Sli.do_room
library(xml2)
library(jsonlite)
library(dplyr)
library(ggplot2)
library(tidyverse)
library(purrr)
library(data.table)
library(plotly)
library(lubridate) # time formatting
library(zoo) # time formatting
typhoon_all <- read.csv("https://raw.githubusercontent.com/jason19970210/BigDataAnalyticalMethods/master/Final/Data/typhoon/typhoon_web_table.csv",stringsAsFactors = F)
eq_xml_url_base <- paste("https://raw.githubusercontent.com/jason19970210/BigDataAnalyticalMethods/master/Final/Data/earthquake/CWB-EQ-Catalog-%d","xml",sep = ".")
sea_level <- fromJSON("https://opendata.cwb.gov.tw/fileapi/v1/opendataapi/C-B0048-001?Authorization=CWB-64CBB768-EE64-4FD2-AED5-0A68D1A48B79&downloadType=WEB&format=JSON")
seatemp <- fromJSON("https://opendata.cwb.gov.tw/fileapi/v1/opendataapi/C-B0050-001?Authorization=CWB-64CBB768-EE64-4FD2-AED5-0A68D1A48B79&downloadType=WEB&format=JSON")
map_df(1990:2018, function(i){
xml_url <- read_xml(sprintf(eq_xml_url_base,i))
eqinfo <- xml_find_all(xml_url, ".//earthquakeinfo")
#tibble::tibble
data.frame(originTime = xml_text(xml_find_first(eqinfo, ".//originTime")),
epicenterLon = xml_text(xml_find_first(eqinfo, ".//epicenterLon")),
epicenterLat = xml_text(xml_find_first(eqinfo, ".//epicenterLat")),
depth = xml_text(xml_find_first(eqinfo, ".//depth")),
magnitudeValue = xml_text(xml_find_first(eqinfo, ".//magnitudeValue")),
gap = xml_text(xml_find_first(eqinfo, "./gap")),
rms = xml_text(xml_find_first(eqinfo, "./rms")),
erh = xml_text(xml_find_first(eqinfo, "./erh")),
erz = xml_text(xml_find_first(eqinfo, "./erz")),
quality = xml_text(xml_find_first(eqinfo, "./quality")),
stringsAsFactors = FALSE #doesnt have this parameter in tibble
) #end of data.frame
}) -> eq_df
sea_level <- sea_level$Cwbopendata$dataset$location
sea_level <- data.table(sea_level)
sea_level[, number := 1:nrow(sea_level)]
sea_level[, yyyymm := ItemValue[[.N]][[1]], by = number]
sea_level[, MeanSeaLevel := ItemValue[[.N]][2], by = number]
sea_level[, MeanSeaLevel_unit := ItemUnit[[.N]][1], by = number]
sea_level[, HHWL := ItemValue[[.N]][3], by = number]
sea_level[, HHWL_unit := ItemUnit[[.N]][2], by = number]
sea_level[, LLWL := ItemValue[[.N]][4], by = number]
sea_level[, LLWL_unit := ItemUnit[[.N]][3], by = number]
sea_level <- select(sea_level, SiteName, SiteId, yyyymm, MeanSeaLevel, MeanSeaLevel_unit, HHWL, HHWL_unit, LLWL, LLWL_unit)
seatemp <- seatemp$Cwbopendata$dataset$location
sea_test2 <- data.frame(obsrtime = NA,
MonthAvg = NA,
MonthHigh = NA,
MonthHighTime = NA,
MonthLow = NA,
MonthLowTime = NA,
LocationName = NA,
StationID = NA,
lon = NA,
lat = NA)
for (j in 1:nrow(seatemp)){
sea_test <- data.frame(obsrtime = NA,
MonthAvg = NA,
MonthHigh = NA,
MonthHighTime = NA,
MonthLow = NA,
MonthLowTime = NA)
for (i in 1:length(seatemp$time[[j]]$obsrtime)) {
sea_temp <- c(seatemp$time[[j]]$obsrtime[i], seatemp$time[[j]]$weatherElement$elementValue[[i]]$vlaue)
sea_test <- rbind(sea_test, sea_temp)
}
sea_test <- sea_test[-1,]
sea_test$LocationName <- seatemp$LocationName[j]
sea_test$StationID <- seatemp$StationID[j]
sea_test$lon <- seatemp$lon[j]
sea_test$lat <- seatemp$lat[j]
sea_test2 <- rbind(sea_test2, sea_test)
}
sea_test2 <- sea_test2[-1,]
seatemp <- select(sea_test2,
LocationName,
StationID,
lon,
lat,
obsrtime,
MonthAvg,
MonthHigh,
MonthHighTime,
MonthLow,
MonthLowTime)
eq_df$originTime <- as.Date(eq_df$originTime)
eq_df$epicenterLon <- as.numeric(eq_df$epicenterLon)
eq_df$epicenterLat <- as.numeric(eq_df$epicenterLat)
eq_df$depth <- as.numeric(eq_df$depth)
eq_df$magnitudeValue <- as.numeric(eq_df$magnitudeValue)
eq_df$gap <- as.numeric(eq_df$gap)
eq_df$rms <- as.numeric(eq_df$rms)
eq_df$erh <- as.numeric(eq_df$erh)
eq_df$erz <- as.numeric(eq_df$erz)
# Without NA error message
sea_level$year <- as.numeric(substring(sea_level$yyyymm, 1,4))
sea_level$month <- as.numeric(substring(sea_level$yyyymm, 5,6))
sea_level$MeanSeaLevel <- as.numeric(sea_level$MeanSeaLevel)
sea_level$HHWL <- as.numeric(sea_level$HHWL)
sea_level$LLWL <- as.numeric(sea_level$LLWL)
# Without NA error message
sea_level <- sea_level %>% filter(.$month != 0) %>% filter(.$year <= 2017 & .$year >= 2012)
seatemp$lon <- as.numeric(seatemp$lon)
seatemp$lat <- as.numeric(seatemp$lat)
seatemp$obsrtime_year <- as.numeric(substring(seatemp$obsrtime, 1,4))
seatemp$MonthAvg <- as.numeric(seatemp$MonthAvg)
seatemp$MonthHigh <- as.numeric(seatemp$MonthHigh)
seatemp$MonthLow <- as.numeric(seatemp$MonthLow)
# Without NA error message
sub_level <- sea_level %>% filter(.$SiteName %in% c("高雄市 高雄","連江縣 馬祖","基隆市 基隆","臺中市 臺中港","臺東縣 蘭嶼")) %>% as.data.table()
sub_temp <- seatemp %>% filter(.$LocationName %in% c("高雄","馬祖","基隆","臺中港","蘭嶼"))
data.table(sub_level)
## SiteName SiteId yyyymm MeanSeaLevel MeanSeaLevel_unit HHWL
## 1: 臺東縣 蘭嶼 1396 201201 -67.9 cm NA
## 2: 臺東縣 蘭嶼 1396 201202 -52.6 cm NA
## 3: 臺東縣 蘭嶼 1396 201203 -45.0 cm NA
## 4: 臺東縣 蘭嶼 1396 201204 -47.1 cm NA
## 5: 臺東縣 蘭嶼 1396 201205 -37.8 cm NA
## ---
## 344: 連江縣 馬祖 1926 201707 -111.9 cm 199.6
## 345: 連江縣 馬祖 1926 201708 -105.8 cm 208.9
## 346: 連江縣 馬祖 1926 201709 -90.1 cm 200.1
## 347: 連江縣 馬祖 1926 201711 -92.2 cm 210.7
## 348: 連江縣 馬祖 1926 201712 -104.9 cm 214.7
## HHWL_unit LLWL LLWL_unit year month
## 1: cm NA cm 2012 1
## 2: cm NA cm 2012 2
## 3: cm NA cm 2012 3
## 4: cm NA cm 2012 4
## 5: cm NA cm 2012 5
## ---
## 344: cm -454.1 cm 2017 7
## 345: cm -435.5 cm 2017 8
## 346: cm -411.4 cm 2017 9
## 347: cm -431.9 cm 2017 11
## 348: cm -480.3 cm 2017 12
sub_level[,LocationName := ifelse(SiteName == "高雄市 高雄", "高雄",
ifelse(SiteName == "連江縣 馬祖", "馬祖",
ifelse(SiteName == "基隆市 基隆", "基隆",
ifelse(SiteName == "臺中市 臺中港", "臺中港",
ifelse(SiteName == "臺東縣 蘭嶼", "蘭嶼" ,"")
)
)
)
)
]
sub_level$yyyymm <- paste0(sub_level$year,"/",sub_level$month)
sub_level_temp <- left_join(sub_level,sub_temp, by = c("LocationName","yyyymm"="obsrtime"))
cor(sub_level_temp$MeanSeaLevel, sub_level_temp$MonthAvg, use = "complete.obs")
## [1] 0.3998575
plot_ly(x = ~sub_level_temp$MonthAvg, y = ~sub_level_temp$MeanSeaLevel, type = 'scatter', mode = 'markers', color = ~sub_level_temp$LocationName)
## Warning: Ignoring 7 observations
plot_ly(x = ~sub_level_temp$lat, y = ~sub_level_temp$lon, z = ~sub_level_temp$MeanSeaLevel, type = 'scatter3d', color = ~sub_level_temp$LocationName)
## Warning: Ignoring 6 observations
# 相關係數級距:中度相關
plot_ly(x = ~sub_level_temp$lat, y = ~sub_level_temp$lon, z = ~sub_level_temp$MonthAvg, type = 'scatter3d', color = ~sub_level_temp$LocationName)
## Warning: Ignoring 7 observations
cor.test(sub_level_temp$MeanSeaLevel, sub_level_temp$MonthAvg, use = "complete.obs")
##
## Pearson's product-moment correlation
##
## data: sub_level_temp$MeanSeaLevel and sub_level_temp$MonthAvg
## t = 8.0322, df = 339, p-value = 1.598e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3066752 0.4854478
## sample estimates:
## cor
## 0.3998575
sub_level_temp$ym <- gsub("/","-",sub_level_temp$yyyymm)
sub_level_temp$ym <- as.Date(as.yearmon(sub_level_temp$ym))
sub_level_temp %>% plot_ly(x = ~.$ym, y = ~.$MonthAvg, type = 'scatter', mode = 'lines+markers', color = ~.$LocationName)
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.